From d4f4d9761cbd41c3ab6de79383ff39b9f97bf452 Mon Sep 17 00:00:00 2001
From: Syrone Wong <wong.syrone@gmail.com>
Date: Sat, 18 Nov 2017 20:06:50 +0800
Subject: [PATCH] Upgrade PCRE to PCRE2

- Use 8bit variant by default

This comes from a PR closed and never reopen as at times PCRE2 was too
new(???.)

Ref: https://github.com/shadowsocks/shadowsocks-libev/pull/1792
Signed-off-by: Syrone Wong <wong.syrone@gmail.com>
[ squash the first 2 patch from PR, drop the last one ]
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 .travis.yml  |   9 ++-
 configure.ac |   8 +--
 m4/pcre.m4   | 152 ------------------------------------------
 m4/pcre2.m4  | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/rule.c   |  53 ++++++++++++---
 src/rule.h   |  23 +++++--
 6 files changed, 253 insertions(+), 173 deletions(-)
 delete mode 100644 m4/pcre.m4
 create mode 100644 m4/pcre2.m4

# diff --git a/.travis.yml b/.travis.yml
# index ee3424c..e7da08c 100644
# --- a/.travis.yml
# +++ b/.travis.yml
# @@ -11,11 +11,12 @@ env:
#      global:
#          - LIBSODIUM_VER=1.0.12
#          - MBEDTLS_VER=2.4.0
# +        - PCRE2_VER=10.30
#  before_install:
#      - |
#        if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
#            # All dependencies for macOS build. Some packages has been installed by travis so use reinstall.
# -          brew reinstall autoconf automake xmlto c-ares libev mbedtls libsodium asciidoc >> /dev/null 2>&1;
# +          brew reinstall autoconf automake xmlto pcre2 c-ares libev mbedtls libsodium asciidoc >> /dev/null 2>&1;
#        else
#            wget  https://github.com/jedisct1/libsodium/releases/download/$LIBSODIUM_VER/libsodium-$LIBSODIUM_VER.tar.gz;
#            tar xvf libsodium-$LIBSODIUM_VER.tar.gz;
# @@ -29,6 +30,12 @@ before_install:
#            make SHARED=1;
#            sudo make install;
#            popd;
# +          wget https://ftp.pcre.org/pub/pcre/pcre2-$PCRE2_VER.tar.gz;
# +          tar xvf pcre2-$PCRE2_VER.tar.gz;
# +          pushd pcre2-$PCRE2_VER;
# +          ./configure --prefix=/usr --enable-pcre2-16 --enable-pcre2-32 && make;
# +          sudo make install;
# +          popd;
#            # Load cached docker images
#            if [[ -d $HOME/docker ]]; then
#                ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load";
--- a/configure.ac
+++ b/configure.ac
@@ -20,10 +20,10 @@ AC_DISABLE_STATIC
 AC_DISABLE_SHARED
 LT_INIT([dlopen])
 
-dnl Check for pcre library
-TS_CHECK_PCRE
-if test "x${enable_pcre}" != "xyes"; then
-  AC_MSG_ERROR([Cannot find pcre library. Configure --with-pcre=DIR])
+dnl Check for pcre2 library
+TS_CHECK_PCRE2
+if test "x${enable_pcre2}" != "xyes"; then
+  AC_MSG_ERROR([Cannot find pcre2 library. Configure --with-pcre2=DIR])
 fi
 
 dnl Checks for using shared libraries from system
--- a/m4/pcre.m4
+++ /dev/null
@@ -1,152 +0,0 @@
-dnl -------------------------------------------------------- -*- autoconf -*-
-dnl Licensed to the Apache Software Foundation (ASF) under one or more
-dnl contributor license agreements.  See the NOTICE file distributed with
-dnl this work for additional information regarding copyright ownership.
-dnl The ASF licenses this file to You under the Apache License, Version 2.0
-dnl (the "License"); you may not use this file except in compliance with
-dnl the License.  You may obtain a copy of the License at
-dnl
-dnl     http://www.apache.org/licenses/LICENSE-2.0
-dnl
-dnl Unless required by applicable law or agreed to in writing, software
-dnl distributed under the License is distributed on an "AS IS" BASIS,
-dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-dnl See the License for the specific language governing permissions and
-dnl limitations under the License.
-
-dnl
-dnl TS_ADDTO(variable, value)
-dnl
-dnl  Add value to variable
-dnl
-AC_DEFUN([TS_ADDTO], [
-  if test "x$$1" = "x"; then
-    test "x$verbose" = "xyes" && echo "  setting $1 to \"$2\""
-    $1="$2"
-  else
-    ats_addto_bugger="$2"
-    for i in $ats_addto_bugger; do
-      ats_addto_duplicate="0"
-      for j in $$1; do
-        if test "x$i" = "x$j"; then
-          ats_addto_duplicate="1"
-          break
-        fi
-      done
-      if test $ats_addto_duplicate = "0"; then
-        test "x$verbose" = "xyes" && echo "  adding \"$i\" to $1"
-        $1="$$1 $i"
-      fi
-    done
-  fi
-])dnl
-
-dnl
-dnl TS_ADDTO_RPATH(path)
-dnl
-dnl   Adds path to variable with the '-rpath' directive.
-dnl
-AC_DEFUN([TS_ADDTO_RPATH], [
-  AC_MSG_NOTICE([adding $1 to RPATH])
-  TS_ADDTO(LIBTOOL_LINK_FLAGS, [-R$1])
-])dnl
-
-dnl
-dnl pcre.m4: Trafficserver's pcre autoconf macros
-dnl
-
-dnl
-dnl TS_CHECK_PCRE: look for pcre libraries and headers
-dnl
-AC_DEFUN([TS_CHECK_PCRE], [
-enable_pcre=no
-AC_ARG_WITH(pcre, [AC_HELP_STRING([--with-pcre=DIR],[use a specific pcre library])],
-[
-  if test "x$withval" != "xyes" && test "x$withval" != "x"; then
-    pcre_base_dir="$withval"
-    if test "$withval" != "no"; then
-      enable_pcre=yes
-      case "$withval" in
-      *":"*)
-        pcre_include="`echo $withval |sed -e 's/:.*$//'`"
-        pcre_ldflags="`echo $withval |sed -e 's/^.*://'`"
-        AC_MSG_CHECKING(checking for pcre includes in $pcre_include libs in $pcre_ldflags )
-        ;;
-      *)
-        pcre_include="$withval/include"
-        pcre_ldflags="$withval/lib"
-        AC_MSG_CHECKING(checking for pcre includes in $withval)
-        ;;
-      esac
-    fi
-  fi
-],
-[
-  AC_CHECK_PROG(PCRE_CONFIG, pcre-config, pcre-config)
-  if test "x$PCRE_CONFIG" != "x"; then
-    enable_pcre=yes
-    pcre_base_dir="`$PCRE_CONFIG --prefix`"
-    pcre_include="`$PCRE_CONFIG --cflags | sed -es/-I//`"
-    pcre_ldflags="`$PCRE_CONFIG --libs | sed -es/-lpcre// -es/-L//`"
-  fi
-])
-
-if test "x$pcre_base_dir" = "x"; then
-  AC_MSG_CHECKING([for pcre location])
-  AC_CACHE_VAL(ats_cv_pcre_dir,[
-  for dir in /usr/local /usr ; do
-    if test -d $dir && ( test -f $dir/include/pcre.h || test -f $dir/include/pcre/pcre.h ); then
-      ats_cv_pcre_dir=$dir
-      break
-    fi
-  done
-  ])
-  pcre_base_dir=$ats_cv_pcre_dir
-  if test "x$pcre_base_dir" = "x"; then
-    enable_pcre=no
-    AC_MSG_RESULT([not found])
-  else
-    enable_pcre=yes
-    pcre_include="$pcre_base_dir/include"
-    pcre_ldflags="$pcre_base_dir/lib"
-    AC_MSG_RESULT([$pcre_base_dir])
-  fi
-else
-  AC_MSG_CHECKING(for pcre headers in $pcre_include)
-  if test -d $pcre_include && test -d $pcre_ldflags && ( test -f $pcre_include/pcre.h || test -f $pcre_include/pcre/pcre.h ); then
-    AC_MSG_RESULT([ok])
-  else
-    AC_MSG_RESULT([not found])
-  fi
-fi
-
-pcreh=0
-pcre_pcreh=0
-if test "$enable_pcre" != "no"; then
-  saved_ldflags=$LDFLAGS
-  saved_cppflags=$CFLAGS
-  pcre_have_headers=0
-  pcre_have_libs=0
-  if test "$pcre_base_dir" != "/usr"; then
-    TS_ADDTO(CFLAGS, [-I${pcre_include}])
-    TS_ADDTO(CFLAGS, [-DPCRE_STATIC])
-    TS_ADDTO(LDFLAGS, [-L${pcre_ldflags}])
-    TS_ADDTO_RPATH(${pcre_ldflags})
-  fi
-  AC_SEARCH_LIBS([pcre_exec], [pcre], [pcre_have_libs=1])
-  if test "$pcre_have_libs" != "0"; then
-    AC_CHECK_HEADERS(pcre.h, [pcre_have_headers=1])
-    AC_CHECK_HEADERS(pcre/pcre.h, [pcre_have_headers=1])
-  fi
-  if test "$pcre_have_headers" != "0"; then
-    AC_DEFINE(HAVE_LIBPCRE,1,[Compiling with pcre support])
-    AC_SUBST(LIBPCRE, [-lpcre])
-  else
-    enable_pcre=no
-    CFLAGS=$saved_cppflags
-    LDFLAGS=$saved_ldflags
-  fi
-fi
-AC_SUBST(pcreh)
-AC_SUBST(pcre_pcreh)
-])
--- /dev/null
+++ b/m4/pcre2.m4
@@ -0,0 +1,181 @@
+dnl -------------------------------------------------------- -*- autoconf -*-
+dnl Licensed to the Apache Software Foundation (ASF) under one or more
+dnl contributor license agreements.  See the NOTICE file distributed with
+dnl this work for additional information regarding copyright ownership.
+dnl The ASF licenses this file to You under the Apache License, Version 2.0
+dnl (the "License"); you may not use this file except in compliance with
+dnl the License.  You may obtain a copy of the License at
+dnl
+dnl     http://www.apache.org/licenses/LICENSE-2.0
+dnl
+dnl Unless required by applicable law or agreed to in writing, software
+dnl distributed under the License is distributed on an "AS IS" BASIS,
+dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+dnl See the License for the specific language governing permissions and
+dnl limitations under the License.
+
+dnl Modified by Syrone Wong <wong.syrone@gmail.com> to support pcre2 8bit variant only
+
+dnl
+dnl TS_ADDTO(variable, value)
+dnl
+dnl  Add value to variable
+dnl
+AC_DEFUN([TS_ADDTO], [
+  if test "x$$1" = "x"; then
+    test "x$verbose" = "xyes" && echo "  setting $1 to \"$2\""
+    $1="$2"
+  else
+    ats_addto_bugger="$2"
+    for i in $ats_addto_bugger; do
+      ats_addto_duplicate="0"
+      for j in $$1; do
+        if test "x$i" = "x$j"; then
+          ats_addto_duplicate="1"
+          break
+        fi
+      done
+      if test $ats_addto_duplicate = "0"; then
+        test "x$verbose" = "xyes" && echo "  adding \"$i\" to $1"
+        $1="$$1 $i"
+      fi
+    done
+  fi
+])dnl
+
+dnl
+dnl TS_ADDTO_RPATH(path)
+dnl
+dnl   Adds path to variable with the '-rpath' directive.
+dnl
+AC_DEFUN([TS_ADDTO_RPATH], [
+  AC_MSG_NOTICE([adding $1 to RPATH])
+  TS_ADDTO(LIBTOOL_LINK_FLAGS, [-R$1])
+])dnl
+
+dnl
+dnl pcre2.m4: Trafficserver's pcre2 autoconf macros
+dnl
+
+dnl
+dnl TS_CHECK_PCRE2: look for pcre2 libraries and headers
+dnl
+AC_DEFUN([TS_CHECK_PCRE2], [
+enable_pcre2=no
+AC_ARG_WITH(pcre2, [AC_HELP_STRING([--with-pcre2=DIR],[use a specific pcre2 library])],
+[
+  if test "x$withval" != "xyes" && test "x$withval" != "x"; then
+    pcre2_base_dir="$withval"
+    if test "$withval" != "no"; then
+      enable_pcre2=yes
+      case "$withval" in
+      *":"*)
+        pcre2_include="`echo $withval |sed -e 's/:.*$//'`"
+        pcre2_ldflags="`echo $withval |sed -e 's/^.*://'`"
+        AC_MSG_CHECKING(checking for pcre2 includes in $pcre2_include libs in $pcre2_ldflags )
+        ;;
+      *)
+        pcre2_include="$withval/include"
+        pcre2_ldflags="$withval/lib"
+        AC_MSG_CHECKING(checking for pcre2 includes in $withval)
+        ;;
+      esac
+    fi
+  fi
+],
+[
+  AC_CHECK_PROG(PCRE2_CONFIG, pcre2-config, pcre2-config)
+  if test "x$PCRE2_CONFIG" != "x"; then
+    enable_pcre2=yes
+    pcre2_base_dir="`$PCRE2_CONFIG --prefix`"
+    pcre2_include="`$PCRE2_CONFIG --cflags | sed -es/-I//`"
+    pcre2_ldflags="`$PCRE2_CONFIG --libs8 | sed -es/-lpcre2-8// -es/-L//`"
+  fi
+])
+
+if test "x$pcre2_base_dir" = "x"; then
+  AC_MSG_CHECKING([for pcre2 location])
+  AC_CACHE_VAL(ats_cv_pcre2_dir,[
+  for dir in /usr/local /usr ; do
+    if test -d $dir && ( test -f $dir/include/pcre2.h || test -f $dir/include/pcre2/pcre2.h ); then
+      ats_cv_pcre2_dir=$dir
+      break
+    fi
+  done
+  ])
+  pcre2_base_dir=$ats_cv_pcre2_dir
+  if test "x$pcre2_base_dir" = "x"; then
+    enable_pcre2=no
+    AC_MSG_RESULT([not found])
+  else
+    enable_pcre2=yes
+    pcre2_include="$pcre2_base_dir/include"
+    pcre2_ldflags="$pcre2_base_dir/lib"
+    AC_MSG_RESULT([$pcre2_base_dir])
+  fi
+else
+  AC_MSG_CHECKING(for pcre2 headers in $pcre2_include)
+  if test -d $pcre2_include && test -d $pcre2_ldflags && ( test -f $pcre2_include/pcre2.h || test -f $pcre2_include/pcre2/pcre2.h ); then
+    AC_MSG_RESULT([ok])
+  else
+    AC_MSG_RESULT([not found])
+  fi
+fi
+
+pcre2h=0
+pcre2_pcre2h=0
+if test "$enable_pcre2" != "no"; then
+  saved_ldflags=$LDFLAGS
+  saved_cppflags=$CFLAGS
+  pcre2_have_headers=0
+  pcre2_have_libs=0
+  if test "$pcre2_base_dir" != "/usr"; then
+    TS_ADDTO(CFLAGS, [-I${pcre2_include}])
+    TS_ADDTO(CFLAGS, [-DPCRE2_STATIC])
+    TS_ADDTO(LDFLAGS, [-L${pcre2_ldflags}])
+    TS_ADDTO_RPATH(${pcre2_ldflags})
+  fi
+  AC_SEARCH_LIBS([pcre2_match_8], [pcre2-8], [pcre2_have_libs=1])
+  if test "$pcre2_have_libs" != "0"; then
+      AC_MSG_CHECKING([pcre2.h])
+  AC_COMPILE_IFELSE(
+    [AC_LANG_PROGRAM(
+      [[
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+      ]],
+      [[
+      ]]
+    )],
+    [pcre2_have_headers=1
+    AC_MSG_RESULT([ok])],
+    [AC_MSG_RESULT([not found])]
+  )
+
+    AC_MSG_CHECKING([pcre2/pcre2.h])
+  AC_COMPILE_IFELSE(
+    [AC_LANG_PROGRAM(
+      [[
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2/pcre2.h>
+      ]],
+      [[
+      ]]
+    )],
+    [pcre2_have_headers=1
+    AC_MSG_RESULT([ok])],
+    [AC_MSG_RESULT([not found])]
+  )
+  fi
+  if test "$pcre2_have_headers" != "0"; then
+    AC_DEFINE(HAVE_LIBPCRE2,1,[Compiling with pcre2 support])
+    AC_SUBST(LIBPCRE2, [-lpcre2-8])
+  else
+    enable_pcre2=no
+    CFLAGS=$saved_cppflags
+    LDFLAGS=$saved_ldflags
+  fi
+fi
+AC_SUBST(pcre2h)
+AC_SUBST(pcre2_pcre2h)
+])
--- a/src/rule.c
+++ b/src/rule.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2011 and 2012, Dustin Lundquist <dustin@null-ptr.net>
  * Copyright (c) 2011 Manuel Kasper <mk@neon1.net>
+ * Copyright (c) 2017 Syrone Wong <wong.syrone@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -74,18 +75,37 @@ add_rule(struct cork_dllist *rules, rule
     cork_dllist_add(rules, &rule->entries);
 }
 
+/*
+ * XXX: As pattern and subject are char arguments, they can be straightforwardly
+ *      cast to PCRE2_SPTR as we are working in 8-bit code units.
+ */
+
 int
 init_rule(rule_t *rule)
 {
     if (rule->pattern_re == NULL) {
-        const char *reerr;
-        int reerroffset;
+        int errornumber;
+        PCRE2_SIZE erroroffset;
+        rule->pattern_re = pcre2_compile(
+            (PCRE2_SPTR)rule->pattern,   /* the pattern */
+            PCRE2_ZERO_TERMINATED,       /* indicates pattern is zero-terminated */
+            0,                           /* default options */
+            &errornumber,                /* for error number */
+            &erroroffset,                /* for error offset */
+            NULL);                       /* use default compile context */
 
-        rule->pattern_re =
-            pcre_compile(rule->pattern, 0, &reerr, &reerroffset, NULL);
         if (rule->pattern_re == NULL) {
-            LOGE("Regex compilation of \"%s\" failed: %s, offset %d",
-                 rule->pattern, reerr, reerroffset);
+            PCRE2_UCHAR errbuffer[512];
+            pcre2_get_error_message(errornumber, errbuffer, sizeof(errbuffer));
+            LOGE("PCRE2 regex compilation failed at offset %d: %s\n", (int)erroroffset,
+                 errbuffer);
+            return 0;
+        }
+
+        rule->pattern_re_match_data = pcre2_match_data_create_from_pattern(rule->pattern_re, NULL);
+
+        if (rule->pattern_re_match_data == NULL) {
+            ERROR("PCRE2: the memory for the block could not be obtained");
             return 0;
         }
     }
@@ -105,8 +125,15 @@ lookup_rule(const struct cork_dllist *ru
 
     cork_dllist_foreach_void(rules, curr, next) {
         rule_t *rule = cork_container_of(curr, rule_t, entries);
-        if (pcre_exec(rule->pattern_re, NULL,
-                      name, name_len, 0, 0, NULL, 0) >= 0)
+        if (pcre2_match(
+                rule->pattern_re,            /* the compiled pattern */
+                (PCRE2_SPTR)name,            /* the subject string */
+                name_len,                    /* the length of the subject */
+                0,                           /* start at offset 0 in the subject */
+                0,                           /* default options */
+                rule->pattern_re_match_data, /* block for storing the result */
+                NULL                         /* use default match context */
+                ) >= 0)
             return rule;
     }
 
@@ -127,7 +154,13 @@ free_rule(rule_t *rule)
         return;
 
     ss_free(rule->pattern);
-    if (rule->pattern_re != NULL)
-        pcre_free(rule->pattern_re);
+    if (rule->pattern_re != NULL) {
+        pcre2_code_free(rule->pattern_re);                    /* data and the compiled pattern. */
+        rule->pattern_re            = NULL;
+    }
+    if (rule->pattern_re_match_data != NULL) {
+        pcre2_match_data_free(rule->pattern_re_match_data);   /* Release memory used for the match */
+        rule->pattern_re_match_data = NULL;
+    }
     ss_free(rule);
 }
--- a/src/rule.h
+++ b/src/rule.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2011 and 2012, Dustin Lundquist <dustin@null-ptr.net>
  * Copyright (c) 2011 Manuel Kasper <mk@neon1.net>
+ * Copyright (c) 2017 Syrone Wong <wong.syrone@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,17 +34,27 @@
 
 #include <libcork/ds.h>
 
-#ifdef HAVE_PCRE_H
-#include <pcre.h>
-#elif HAVE_PCRE_PCRE_H
-#include <pcre/pcre.h>
-#endif
+/*
+ * The PCRE2_CODE_UNIT_WIDTH macro must be defined before including pcre2.h.
+ * For a program that uses only one code unit width, setting it to 8, 16, or 32
+ * makes it possible to use generic function names such as pcre2_compile(). Note
+ * that just changing 8 to 16 (for example) is not sufficient to convert this
+ * program to process 16-bit characters. Even in a fully 16-bit environment, where
+ * string-handling functions such as strcmp() and printf() work with 16-bit
+ * characters, the code for handling the table of named substrings will still need
+ * to be modified.
+ */
+/* we only need to support ASCII chartable, thus set it to 8 */
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <pcre2.h>
 
 typedef struct rule {
     char *pattern;
 
     /* Runtime fields */
-    pcre *pattern_re;
+    pcre2_code *pattern_re;
+    pcre2_match_data *pattern_re_match_data;
 
     struct cork_dllist_item entries;
 } rule_t;
